board_games <- readr::read_csv("/cloud/project/data/board_games.csv")
## Rows: 10532 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): description, image, name, thumbnail, artist, category, compilation...
## dbl (10): game_id, max_players, max_playtime, min_age, min_players, min_play...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
board_games%>%
arrange(desc(max_playtime))
## # A tibble: 10,532 × 22
## game_id description image max_players max_playtime min_age min_players
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 4815 This is a war g… //cf.g… 10 60000 14 8
## 2 46669 (from GMT websi… //cf.g… 4 17280 0 2
## 3 254 (from the back … //cf.g… 7 12000 14 2
## 4 6942 Introduction:&#… //cf.g… 4 12000 12 2
## 5 1499 (from ADG websi… //cf.g… 7 6000 12 2
## 6 5622 Pacific War is … //cf.g… 2 6000 16 2
## 7 38578 Age of Muskets … //cf.g… 6 6000 0 2
## 8 173504 The Greatest Da… //cf.g… 8 6000 12 2
## 9 5651 The Longest Day… //cf.g… 8 5400 12 2
## 10 13532 (from the box:)… //cf.g… 2 4500 12 2
## # … with 10,522 more rows, and 15 more variables: min_playtime <dbl>,
## # name <chr>, playing_time <dbl>, thumbnail <chr>, year_published <dbl>,
## # artist <chr>, category <chr>, compilation <chr>, designer <chr>,
## # expansion <chr>, family <chr>, mechanic <chr>, publisher <chr>,
## # average_rating <dbl>, users_rated <dbl>
board_games <- board_games %>%
mutate(
playtime_group = case_when(
playing_time < 1 ~ "N/A",
playing_time >= 1 & playing_time <= 20 ~ "Under 20 minutes",
playing_time >= 21 & playing_time <= 40 ~ "20-40 minutes",
playing_time >= 41 & playing_time <= 60 ~ "40-60 minutes",
playing_time >= 61 & playing_time <= 120 ~ "1-2 hours",
playing_time >= 121 & playing_time <= 180 ~ "2-3 hours",
playing_time >= 181 & playing_time <= 240 ~ "3-4 hours",
playing_time >= 241 & playing_time <= 360 ~ "4-6 hours",
playing_time >= 361 ~ "Over 6 hours"
))
board_games%>%
count(playtime_group)%>%
arrange(desc(n))
## # A tibble: 9 × 2
## playtime_group n
## <chr> <int>
## 1 40-60 minutes 2819
## 2 20-40 minutes 2210
## 3 Under 20 minutes 1972
## 4 1-2 hours 1896
## 5 2-3 hours 528
## 6 N/A 350
## 7 3-4 hours 344
## 8 4-6 hours 312
## 9 Over 6 hours 101
#{r} board_games%>% filter(playing_time < 1000)%>% arrange(desc(playing_time)) #
stacked barplot
board_games %>%
ggplot(aes(x = playtime_group , y = average_rating, fill=playtime_group)) +
labs(title = "title",
fill = "Playtime Group",
x = "xaxis",
y = "yaxis") +
theme(axis.text.x=element_text(angle = -45, hjust = 0)) +
geom_violin()
Research questions: Does this change with recommended ages? Does this change with different game categories?
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(tidyverse)
library(heatmaply)
##
## ======================
## Welcome to heatmaply version 1.3.0
##
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
##
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags:
## https://stackoverflow.com/questions/tagged/heatmaply
## ======================
# Heatmap Prep - Counts ------------------------------------------------------------
# fix filter stuff:
categories <- board_games %>%
select(category) %>%
separate(category, into = c("category1", "category2", "category3", "category4"), sep = ",")
## Warning: Expected 4 pieces. Additional pieces discarded in 978 rows [23, 29,
## 30, 37, 52, 99, 106, 128, 132, 186, 190, 194, 195, 199, 205, 207, 216, 223, 233,
## 235, ...].
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 8198 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, ...].
cat_counts <- categories %>%
filter(is.na(category1) == FALSE,
is.na(category2) == FALSE) %>%
group_by(category1, category2) %>%
count() %>%
arrange(desc(n))
top_cat1 <- categories %>%
group_by(category1) %>%
count() %>%
arrange(desc(n))
top_cat2 <- categories %>%
group_by(category2) %>%
count() %>%
arrange(desc(n))
top_cat1_list <- c("Card Game", "Abstract Strategy", "Wargame", "Economic",
"Adventure", "Bluffing", "Action / Dexterity", "Animals",
"Dice", "Ancient", "Fantasy", "Children's Game",
"City Building", "Party Game", "Deduction",
"Aviation / Flight", "Medieval", "Fighting", "Napoleonic",
"American Civil War")
top_cat2_list <- c("Wargame", "Card Game", "Fantasy", "World War II",
"Children's Game", "Dice", "Fighting", "Economic",
"Science Fiction", "Party Game", "Deduction", "Exploration",
"Humor", "Medieval", "Collectible Components", "Racing",
"Movies / TV/ Radio Theme", "Negotiation", "Miniatures",
"Animals")
map_counts <- cat_counts %>%
filter(category1 %in% top_cat1_list & category2 %in% top_cat1_list)
# Heatmap Prep - Ratings --------------------------------------------------
rat_categories <- board_games %>%
select(category, average_rating, name) %>%
separate(category, into = c("category1", "category2", "category3", "category4"), sep = ",")
## Warning: Expected 4 pieces. Additional pieces discarded in 978 rows [23, 29,
## 30, 37, 52, 99, 106, 128, 132, 186, 190, 194, 195, 199, 205, 207, 216, 223, 233,
## 235, ...].
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 8198 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, ...].
rat_averages <- rat_categories %>%
filter(is.na(category1) == FALSE,
is.na(category2) == FALSE) %>%
group_by(category1, category2) %>%
summarize(avg_rat = mean(average_rating),
count = n()) %>%
arrange(desc(avg_rat))
## `summarise()` has grouped output by 'category1'. You can override using the `.groups` argument.
colors <- c("#ffffff", "#f2f0f7", "#cbc9e2", "#9e9ac8", "#756bb1", "#54278f")
heatmap <- map_counts %>%
ggplot(aes(x = category2, y = category1, fill = n)) +
geom_tile() +
scale_fill_stepsn(colors = colors, values = c(0, 0.01, 0.1, 0.25, 0.6, 1))
# Gamesonyourphone_ %>%
# group_by(game_mechanic_pop) %>%
# filter(!is.na(game_mechanic_pop)) %>%
# summarize(count = n())%>%
# arrange(desc(count)) %>%
# mutate(proportion = (count/15)*100)
# Gamesonyourphone_ %>%
# group_by(game_mechanic_pop) %>%
# filter(!is.na(game_mechanic_pop)) %>%
# mutate(count = n(),
# proportion = (count/15)*100) %>%
# ggplot(mapping = aes(x = "", y = reorder(game_mechanic_pop, -proportion), fill = proportion, color = proportion)) +
# geom_bar(stat = "identity") +
# coord_polar(theta = "y", start = 0)+
# labs(
# title = "Bike rentals in DC",
# subtitle = "by season",
# x = "Season",
# y = "Number of bikes rented")